Univariate Arima Models

Predicting COVID-19 Deaths (STAT 390)

Author

Ryan Nguyen

1. Load Libraries, Seed, & Data

Code
# Load necessary libraries
library(forecast)
library(tseries)
library(tidyverse)
library(tidymodels)
library(kableExtra)
library(modeltime)
library(timetk)
library(cowplot)
library(Metrics)


# setting the seed
set.seed(123)

# Loading Data  ----------------------------------------------------------------
daily_deaths <- read_csv("data/new_daily_deaths.csv") %>% 
  select(daily_deaths)

east <- read_csv("data/east_daily.csv")

midwest <- read_csv("data/midwest_daily.csv")

south <- read_csv("data/south_daily.csv")

west <- read_csv("data/west_daily.csv")

2. Data Splitting

Code
# Data Splitting ---------------------------------------------------------------
east_splits <- time_series_split(east, initial = "1022 days", assess = "114 days")
east_train <- training(east_splits)
east_test <- testing(east_splits)

midwest_splits <- time_series_split(midwest, initial = "1022 days", assess = "114 days")
midwest_train <- training(midwest_splits)
midwest_test <- testing(midwest_splits)

south_splits <- time_series_split(south, initial = "1022 days", assess = "114 days")
south_train <- training(south_splits)
south_test <- testing(south_splits)

west_splits <- time_series_split(west, initial = "1022 days", assess = "114 days")
west_train <- training(west_splits)
west_test <- testing(west_splits)

## Convert Into Time Series ----------------------------------------------------
east_train_ts <- ts(east_train %>%
                      select(daily_deaths))
east_test_ts <- ts(east_test %>%
                     select(daily_deaths))
east_ts <- ts(east %>% 
                select(daily_deaths))

midwest_train_ts <- ts(midwest_train %>%
                      select(daily_deaths))
midwest_test_ts <- ts(midwest_test %>%
                     select(daily_deaths))
midwest_ts <- ts(midwest %>% 
                select(daily_deaths))

south_train_ts <- ts(south_train %>%
                    select(daily_deaths))
south_test_ts <- ts(south_test %>%
                   select(daily_deaths))
south_ts <- ts(south %>%
                 select(daily_deaths))

west_train_ts <- ts(west_train %>%
                   select(daily_deaths))
west_test_ts <- ts(west_test %>%
                  select(daily_deaths))
west_ts <- ts(west %>%
                select(daily_deaths))

## Plotting Training and Testing -----------------------------------------------
par(mfrow=c(2,2))
ggplot() +
  geom_line(data = east_test, aes(x = date, y = daily_deaths, color = "turquoise")) +
  geom_point(data = east_test, aes(x = date, y = daily_deaths, color = "turquoise"), size = .5) +
  geom_line(data = east_train, aes(x = date, y = daily_deaths, color = "orange")) +
  geom_point(data = east_train, aes(x = date, y = daily_deaths, color = "orange"), size = .5) +
  scale_color_manual(name="Data Set", values = c("turquoise", "orange"), labels = c("Training", "Testing")) +
  labs(y = "Daily Deaths",
       x = "Date",
       title = "East Region Training vs. Testing Data")

Code
ggplot() +
  geom_line(data = midwest_test, aes(x = date, y = daily_deaths, color = "turquoise")) +
  geom_point(data = midwest_test, aes(x = date, y = daily_deaths, color = "turquoise"), size = .5) +
  geom_line(data = midwest_train, aes(x = date, y = daily_deaths, color = "orange")) +
  geom_point(data = midwest_train, aes(x = date, y = daily_deaths, color = "orange"), size = .5) +
  scale_color_manual(name="Data Set", values = c("turquoise", "orange"), labels = c("Training", "Testing")) +
  labs(y = "Daily Deaths",
       x = "Date",
       title = "Midwest Region Training vs. Testing Data")

Code
ggplot() +
  geom_line(data = south_test, aes(x = date, y = daily_deaths, color = "turquoise")) +
  geom_point(data = south_test, aes(x = date, y = daily_deaths, color = "turquoise"), size = .5) +
  geom_line(data = south_train, aes(x = date, y = daily_deaths, color = "orange")) +
  geom_point(data = south_train, aes(x = date, y = daily_deaths, color = "orange"), size = .5) +
  scale_color_manual(name="Data Set", values = c("turquoise", "orange"), labels = c("Training", "Testing")) +
  labs(y = "Daily Deaths",
       x = "Date",
       title = "South Region Training vs. Testing Data")

Code
ggplot() +
  geom_line(data = west_test, aes(x = date, y = daily_deaths, color = "turquoise")) +
  geom_point(data = west_test, aes(x = date, y = daily_deaths, color = "turquoise"), size = .5) +
  geom_line(data = west_train, aes(x = date, y = daily_deaths, color = "orange")) +
  geom_point(data = west_train, aes(x = date, y = daily_deaths, color = "orange"), size = .5) +
  scale_color_manual(name="Data Set", values = c("turquoise", "orange"), labels = c("Training", "Testing")) +
  labs(y = "Daily Deaths",
       x = "Date",
       title = "West Region Training vs. Testing Data")

3. Testing for Stationarity

Code
### Using Augmented Dickey-Fuller Test -----------------------------------------
adf_east <- adf.test(east_ts)
print(adf_east) # p-value is 0.01 < 0.05, implying it is stationary

    Augmented Dickey-Fuller Test

data:  east_ts
Dickey-Fuller = -4.5566, Lag order = 10, p-value = 0.01
alternative hypothesis: stationary
Code
adf_midwest <- adf.test(midwest_ts)
print(adf_midwest) # p-value is 0.325 > 0.05, implying it is NOT stationary

    Augmented Dickey-Fuller Test

data:  midwest_ts
Dickey-Fuller = -2.5984, Lag order = 10, p-value = 0.325
alternative hypothesis: stationary
Code
diff_midwest_ts <- diff(midwest_ts)
diff_adf_midwest <- adf.test(diff_midwest_ts)
print(diff_adf_midwest) # p-value is 0.01 < 0.05, implying it is stationary with 1 diff

    Augmented Dickey-Fuller Test

data:  diff_midwest_ts
Dickey-Fuller = -13.43, Lag order = 10, p-value = 0.01
alternative hypothesis: stationary
Code
adf_south <- adf.test(south_ts)
print(adf_south) # p-value is 0.3018 > 0.05, implying it is NOT stationary

    Augmented Dickey-Fuller Test

data:  south_ts
Dickey-Fuller = -2.6533, Lag order = 10, p-value = 0.3018
alternative hypothesis: stationary
Code
diff_south_ts <- diff(south_ts)
diff_adf_south <- adf.test(diff_south_ts)
print(diff_adf_south) # p-value is 0.01 < 0.05, implying it is stationary with 1 diff

    Augmented Dickey-Fuller Test

data:  diff_south_ts
Dickey-Fuller = -11.36, Lag order = 10, p-value = 0.01
alternative hypothesis: stationary
Code
adf_west <- adf.test(west_ts)
print(adf_west) # p-value is 0.4098 > 0.05, implying it is NOT stationary

    Augmented Dickey-Fuller Test

data:  west_ts
Dickey-Fuller = -2.3981, Lag order = 10, p-value = 0.4098
alternative hypothesis: stationary
Code
diff_west_ts <- diff(west_ts)
diff_adf_west <- adf.test(diff_west_ts)
print(diff_adf_west)  # p-value is 0.01 < 0.05, implying it is stationary with 1 diff

    Augmented Dickey-Fuller Test

data:  diff_west_ts
Dickey-Fuller = -12.247, Lag order = 10, p-value = 0.01
alternative hypothesis: stationary
Code
### Making Testing/Training Stationary -----------------------------------------
# midwest_train_ts <- diff(midwest_train_ts)
# midwest_test_ts <- diff(midwest_test_ts)

# midwest_train_ts <- as.data.frame(midwest_train_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# midwest_test_ts <- as.data.frame(midwest_test_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# 
# midwest_train_ts <- ts(midwest_train_ts)
# midwest_test_ts <- ts(midwest_test_ts)

# south_train_ts <- diff(south_train_ts)
# south_test_ts <- diff(south_test_ts)

# south_train_ts <- as.data.frame(south_train_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# south_test_ts <- as.data.frame(south_test_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# 
# south_train_ts <- ts(south_train_ts)
# south_test_ts <- ts(south_test_ts)

# west_train_ts <- diff(west_train_ts)
# west_test_ts <- diff(west_test_ts)

# west_train_ts <- as.data.frame(west_train_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# west_test_ts <- as.data.frame(west_test_ts) %>% 
#   mutate(daily_deaths = case_when((daily_deaths < 0) ~ 0,
#                                   .default = daily_deaths))
# 
# west_train_ts <- ts(west_train_ts)
# west_test_ts <- ts(west_test_ts)

4. ACF & PACF Plots

Code
## ACF & PACF Plots ------------------------------------------------------------
par(mfrow=c(3, 1))
acf(east_ts, main = "East")
pacf(east_ts, main = "East")
plot(east_ts, main = "Daily Deaths: East")

Code
acf(midwest_ts, main = "Midwest")
pacf(midwest_ts, main = "Midwest")
plot(midwest_ts, main = "Daily Deaths: Midwest")

Code
acf(south_ts, main = "South")
pacf(south_ts, main = "South")
plot(south_ts, main = "Daily Deaths: South")

Code
acf(west_ts, main = "West")
pacf(west_ts, main = "West")
plot(south_ts, main = "Daily Deaths: South")

Code
plot(west_ts, main = "Daily Deaths: West")

5. Time Series Decomposition

Code
# total daily death decomposition
daily_ts <- ts(daily_deaths, frequency = 365)

decomposed_ts <- decompose(daily_ts)

# Plot the original time series and the decomposed components
par(mfrow=c(2,1))
plot(daily_ts, main = "Total Original Time Series", ylab = "Value", col = "blue")
plot(decomposed_ts$trend, main = "Trend Component", ylab = "Value", col = "red")

Code
plot(decomposed_ts$seasonal, main = "Seasonal Component", ylab = "Value", col = "green")
plot(decomposed_ts$random, main = "Residual Component", ylab = "Value", col = "purple")

Code
# East Decomposition -----------------------------------------------------------
east_daily <- east %>% 
  select(daily_deaths)

east_ts <- ts(east_daily, frequency = 365)

decomposed_east <- decompose(east_ts)

# Plot the original time series and the decomposed components
par(mfrow=c(2,1))
plot(east_ts, main = "East Original Time Series", ylab = "Value", col = "blue")
plot(decomposed_east$trend, main = "Trend Component", ylab = "Value", col = "red")

Code
plot(decomposed_east$seasonal, main = "Seasonal Component", ylab = "Value", col = "green")
plot(decomposed_east$random, main = "Residual Component", ylab = "Value", col = "purple")

Code
## Midwest Decomposition ----------------------------------------------------------
midwest_daily <- midwest %>% 
  select(daily_deaths)

midwest_ts <- ts(midwest_daily, frequency = 365)

decomposed_midwest <- decompose(midwest_ts)

# Plot the original time series and the decomposed components
par(mfrow=c(2,1))
plot(midwest_ts, main = "Midwest Original Time Series", ylab = "Value", col = "blue")
plot(decomposed_midwest$trend, main = "Trend Component", ylab = "Value", col = "red")

Code
plot(decomposed_midwest$seasonal, main = "Seasonal Component", ylab = "Value", col = "green")
plot(decomposed_midwest$random, main = "Residual Component", ylab = "Value", col = "purple")

Code
## South Decomposition ----------------------------------------------------------
south_daily <- south %>% 
  select(daily_deaths)

south_ts <- ts(south_daily, frequency = 365)

decomposed_south <- decompose(south_ts)

# Plot the original time series and the decomposed components
par(mfrow=c(2,1))
plot(south_ts, main = "South Original Time Series", ylab = "Value", col = "blue")
plot(decomposed_south$trend, main = "Trend Component", ylab = "Value", col = "red")

Code
plot(decomposed_south$seasonal, main = "Seasonal Component", ylab = "Value", col = "green")
plot(decomposed_south$random, main = "Residual Component", ylab = "Value", col = "purple")

Code
## West Decomposition ----------------------------------------------------------
west_daily <- west %>% 
  select(daily_deaths)

west_ts <- ts(west_daily, frequency = 365)

decomposed_west <- decompose(west_ts)

# Plot the original time series and the decomposed components
par(mfrow=c(2,1))
plot(west_ts, main = "West Original Time Series", ylab = "Value", col = "blue")
plot(decomposed_west$trend, main = "Trend Component", ylab = "Value", col = "red")

Code
plot(decomposed_west$seasonal, main = "Seasonal Component", ylab = "Value", col = "green")
plot(decomposed_west$random, main = "Residual Component", ylab = "Value", col = "purple")

6. ARIMA Model

6.1 Initial Model

Code
# Building ARIMA Model ---------------------------------------------------------
east_arima <- arima(east_train_ts, order=c(2,0,0))
midwest_arima <- arima(midwest_train_ts, order=c(7,1,0))
south_arima <- arima(south_train_ts, order=c(7,1,0))
west_arima <- arima(west_train_ts, order=c(7,1,0))

## Summary & Forecast ---------------------------------------------------------
summary(east_arima)

Call:
arima(x = east_train_ts, order = c(2, 0, 0))

Coefficients:
         ar1     ar2  intercept
      0.8946  0.0181   198.4613
s.e.  0.0312  0.0313    39.1024

sigma^2 estimated as 12157:  log likelihood = -6257.33,  aic = 12522.66

Training set error measures:
                     ME     RMSE      MAE  MPE MAPE      MASE         ACF1
Training set 0.02128508 110.2581 64.66103 -Inf  Inf 0.9917366 -0.003359883
Code
summary(midwest_arima)

Call:
arima(x = midwest_train_ts, order = c(7, 1, 0))

Coefficients:
          ar1      ar2      ar3      ar4     ar5      ar6     ar7
      -0.6984  -0.6552  -0.6662  -0.6405  -0.625  -0.5942  0.2158
s.e.   0.0306   0.0326   0.0331   0.0336   0.033   0.0325  0.0305

sigma^2 estimated as 13720:  log likelihood = -6315.71,  aic = 12647.41

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 0.2590604 117.0771 67.37069 NaN  Inf 0.4941415 -0.01395866
Code
summary(south_arima)

Call:
arima(x = south_train_ts, order = c(7, 1, 0))

Coefficients:
          ar1      ar2     ar3      ar4      ar5      ar6     ar7
      -0.7847  -0.7948  -0.782  -0.7725  -0.7874  -0.6062  0.0069
s.e.   0.0313   0.0349   0.035   0.0351   0.0349   0.0348  0.0312

sigma^2 estimated as 40704:  log likelihood = -6869.67,  aic = 13755.34

Training set error measures:
                   ME    RMSE      MAE MPE MAPE      MASE         ACF1
Training set 0.321387 201.654 117.0239 NaN  Inf 0.5717169 -0.001802532
Code
summary(west_arima)

Call:
arima(x = west_train_ts, order = c(7, 1, 0))

Coefficients:
          ar1      ar2      ar3      ar4      ar5      ar6     ar7
      -0.7392  -0.7212  -0.7311  -0.7477  -0.7069  -0.5949  0.0674
s.e.   0.0312   0.0341   0.0343   0.0339   0.0342   0.0340  0.0311

sigma^2 estimated as 8677:  log likelihood = -6080.76,  aic = 12177.52

Training set error measures:
                    ME     RMSE      MAE  MPE MAPE      MASE         ACF1
Training set 0.2961085 93.10308 54.88534 -Inf  Inf 0.5462533 -0.006060256
Code
east_values <- forecast(east_arima, h = length(east_test_ts))
midwest_values <- forecast(midwest_arima, h = length(midwest_test_ts))
south_values <- forecast(south_arima, h = length(south_test_ts))
west_values <- forecast(west_arima, h = length(west_test_ts))

6.2 Accuracy

Code
## Set Up Plots
east_test_plot <- as.data.frame(east_test_ts) %>% 
  mutate(num = seq(1023, (1023+113)))

midwest_test_plot <- as.data.frame(midwest_test_ts) %>% 
  mutate(num = seq(1023, (1023+113)))

south_test_plot <- as.data.frame(south_test_ts) %>% 
  mutate(num = seq(1023, (1023+113)))

west_test_plot <- as.data.frame(west_test_ts) %>% 
  mutate(num = seq(1023, (1023+113)))
Code
## Evaluating Accuracy ---------------------------------------------------------
east_initial_arima_acc <- forecast::accuracy(east_values)

# east_initial_arima_acc %>% 
#   kbl(caption = "<center>Initial East Accuracy<center>") %>% 
#   kable_styling()

east_pred <- as.data.frame(east_values$mean)

east_pred2 <- cbind(east_pred, east_test_plot)

east_initial_arima_mae <-mae(east_pred2$daily_deaths, east_pred2$x)
east_initial_arima_mase <-mase(east_pred2$daily_deaths, east_pred2$x)

### East Region
east_initial_arima_plot <-ggplot(east_pred2) +
  geom_line(aes(x = num, y = daily_deaths), color = "dodgerblue", linewidth = .75) +
  geom_line(aes(x = num, y = x), color = "orange", linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "East Region Initial ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("orange", "dodgerblue"), labels = c("Actual", "Predicted"))+
  theme_bw()

east_initial_arima_plot

Code
### Midwest Region

midwest_initial_arima_acc <- forecast::accuracy(midwest_values)

# midwest_initial_arima_acc %>% 
#   kbl(caption = "<center>Initial Midwest Accuracy<center>") %>% 
#   kable_styling()

midwest_pred <- as.data.frame(midwest_values$mean)

midwest_pred2 <- cbind(midwest_pred, midwest_test_plot)
midwest_initial_arima_mae <-mae(midwest_pred2$daily_deaths, midwest_pred2$x)
midwest_initial_arima_mase <-mase(midwest_pred2$daily_deaths, midwest_pred2$x)

midwest_initial_arima_plot <-ggplot(midwest_pred2) +
  geom_line(aes(x = num, y = daily_deaths), color = "dodgerblue", linewidth = .75) +
  geom_line(aes(x = num, y = x), color = "orange", linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "midwest Region Initial ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("orange", "dodgerblue"), labels = c("Actual", "Predicted"))+
  theme_bw()

midwest_initial_arima_plot

Code
### South Region

south_initial_arima_acc <- forecast::accuracy(south_values)

south_initial_arima_acc %>% 
  kbl(caption = "<center>Initial South Accuracy<center>") %>% 
  kable_styling()
Initial South Accuracy
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.321387 201.654 117.0239 NaN Inf 0.5717169 -0.0018025
Code
south_pred <- as.data.frame(south_values$mean)
south_pred2 <- cbind(south_pred, south_test_plot)
south_initial_arima_mae <-mae(south_pred2$daily_deaths, south_pred2$x)
south_initial_arima_mase <-mase(south_pred2$daily_deaths, east_pred2$x)

south_initial_arima_plot <-ggplot(south_pred2) +
  geom_line(aes(x = num, y = daily_deaths), color = "dodgerblue", linewidth = .75) +
  geom_line(aes(x = num, y = x), color = "orange", linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "South Region Initial ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("orange", "dodgerblue"), labels = c("Actual", "Predicted"))+
  theme_bw()

south_initial_arima_plot

Code
### West Region

west_initial_arima_acc <- forecast::accuracy(west_values)

# west_initial_arima_acc %>% 
#   kbl(caption = "<center>Initial West Accuracy<center>") %>% 
#   kable_styling()

west_pred <- as.data.frame(west_values$mean)

west_pred2 <- cbind(west_pred, west_test_plot)
west_initial_arima_mae <-mae(west_pred2$daily_deaths, west_pred2$x)
west_initial_arima_mase <-mase(west_pred2$daily_deaths, west_pred2$x)

west_initial_arima_plot <-ggplot(west_pred2) +
  geom_line(aes(x = num, y = daily_deaths), color = "dodgerblue", linewidth = .75) +
  geom_line(aes(x = num, y = x), color = "orange", linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "West Region Initial ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("orange", "dodgerblue"), labels = c("Actual", "Predicted"))+
  theme_bw()

west_initial_arima_plot

6.4 Rebuilding ARIMA Model

Code
# Rebuilding ARIMA Models ------------------------------------------------------
east_arima <- arima(east_train_ts, order=c(7,0,7))
midwest_arima <- arima(midwest_train_ts, order=c(8,1,8))
south_arima <- arima(south_train_ts, order=c(6,1,7))
west_arima <- arima(west_train_ts, order=c(8,1,8))


east_values <- forecast(east_arima, h = length(east_test_ts))
midwest_values <- forecast(midwest_arima, h = length(midwest_test_ts))
south_values <- forecast(south_arima, h = length(south_test_ts))
west_values <- forecast(west_arima, h = length(west_test_ts))

## Evaluating Accuracy & Plots -------------------------------------------------

east_tuned_arima_acc <- forecast::accuracy(east_values)

# east_tuned_arima_acc %>% 
#   kbl(caption = "<center>Tuned East Accuracy<center>") %>% 
#   kable_styling()

east_pred <- as.data.frame(east_values$mean)
east_pred2 <- cbind(east_pred, east_test_plot)
east_tuned_arima_mae <-mae(east_pred2$daily_deaths, east_pred2$x)
east_tuned_arima_mase <-mase(east_pred2$daily_deaths, east_pred2$x)

### East Region
east_tuned_arima_plot <-ggplot(east_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "East Region Tuned ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted"))+
  theme_bw() +
  theme(legend.title = element_blank())

east_tuned_arima_plot

Code
### Midwest Region

midwest_tuned_arima_acc <- forecast::accuracy(midwest_values)

# midwest_tuned_arima_acc %>% 
#   kbl(caption = "<center>Tuned Midwest Accuracy<center>") %>% 
#   kable_styling()

midwest_pred <- as.data.frame(midwest_values$mean)

midwest_pred2 <- cbind(midwest_pred, midwest_test_plot)
midwest_tuned_arima_mae <-mae(midwest_pred2$daily_deaths, midwest_pred2$x)
midwest_tuned_arima_mase <-mase(midwest_pred2$daily_deaths, midwest_pred2$x)

# midwest_pred2 <- midwest_pred2 %>% 
#   mutate(x = case_when((x < 0) ~ 0,
#                        .default = x))

midwest_tuned_arima_plot <-ggplot(midwest_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "Midwest Region Tuned ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

midwest_tuned_arima_plot

Code
### South Region

south_tuned_arima_acc <- forecast::accuracy(south_values)

# south_tuned_arima_acc %>% 
#   kbl(caption = "<center>Tuned South Accuracy<center>") %>% 
#   kable_styling()

south_pred <- as.data.frame(south_values$mean)

south_pred2 <- cbind(south_pred, south_test_plot)
south_tuned_arima_mae <-mae(south_pred2$daily_deaths, south_pred2$x)
south_tuned_arima_mase <-mase(south_pred2$daily_deaths, south_pred2$x)

# south_pred2 <- south_pred2 %>% 
#   mutate(x = case_when((x < 0) ~ 0,
#                        .default = x))

south_tuned_arima_plot <-ggplot(south_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "South Region Tuned ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank()) 

south_tuned_arima_plot

Code
### West Region
west_tuned_arima_acc <- forecast::accuracy(west_values)

# west_tuned_arima_acc %>% 
#   kbl(caption = "<center>Tuned West Accuracy<center>") %>% 
#   kable_styling()

west_pred <- as.data.frame(west_values$mean)

west_pred2 <- cbind(west_pred, west_test_plot)
west_tuned_arima_mae <-mae(west_pred2$daily_deaths, west_pred2$x)
west_tuned_arima_mase <-mase(west_pred2$daily_deaths, west_pred2$x)

# west_pred2 <- west_pred2 %>% 
#   mutate(x = case_when((x < 0) ~ 0,
#                        .default = x))

west_tuned_arima_plot <-ggplot(west_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "West Region Tuned ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

west_tuned_arima_plot

Code
# saving results
save(east_tuned_arima_acc, east_initial_arima_acc, east_initial_arima_plot, east_tuned_arima_plot,
     midwest_tuned_arima_acc, midwest_initial_arima_acc, midwest_initial_arima_plot,midwest_tuned_arima_plot,
     south_tuned_arima_acc, south_initial_arima_acc, south_initial_arima_plot, south_tuned_arima_plot,
     west_tuned_arima_acc, west_initial_arima_acc, west_initial_arima_plot, west_tuned_arima_plot,
     east_initial_arima_mae, east_initial_arima_mase, east_tuned_arima_mae, east_tuned_arima_mase,
     midwest_initial_arima_mae, midwest_initial_arima_mase, midwest_tuned_arima_mae, midwest_tuned_arima_mase,
     south_initial_arima_mae, south_initial_arima_mase, south_tuned_arima_mae, south_tuned_arima_mase,
     west_initial_arima_mae, west_initial_arima_mase, west_tuned_arima_mae, west_tuned_arima_mase,
     file = "results/uni_arima.rda"
     )

7. SARIMA Model

7.1 Initial SARIMA Model

Code
# Building SARIMA model  ------------------------------------------------------
east_sarima <- arima(east_train_ts, order = c(7,0,7), seasonal = list(order = c(1,0,1)))
midwest_sarima <- arima(midwest_train_ts, order = c(8,1,8), seasonal = list(order = c(1,1,1)))
south_sarima <- arima(south_train_ts, order = c(6,1,7), seasonal = list(order = c(1,1,1)))
west_sarima <- arima(west_train_ts, order = c(8,1,8), seasonal = list(order = c(1,1,1)))

# Summary of the SARIMA model
summary(east_sarima)

Call:
arima(x = east_train_ts, order = c(7, 0, 7), seasonal = list(order = c(1, 0, 
    1)))

Coefficients:
         ar1     ar2      ar3     ar4      ar5     ar6     ar7     ma1     ma2
      0.0342  0.0484  -0.0427  0.0451  -0.0446  0.0206  0.8779  0.9194  0.5087
s.e.  0.0317  0.0275   0.0300  0.0239   0.0314  0.0309  0.0288  0.1152  0.1328
         ma3     ma4     ma5     ma6      ma7     sar1     sma1  intercept
      0.4254  0.3642  0.4441  0.5410  -0.0729  -0.3044  -0.1297   188.5030
s.e.  0.0946  0.0748  0.0667  0.0664   0.0761   0.1027   0.1221   100.8286

sigma^2 estimated as 6127:  log likelihood = -5909.76,  aic = 11855.52

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 0.3101742 78.27488 44.82764 NaN  Inf 0.6875426 -0.00384897
Code
summary(midwest_sarima)

Call:
arima(x = midwest_train_ts, order = c(8, 1, 8), seasonal = list(order = c(1, 
    1, 1)))

Coefficients:
         ar1      ar2      ar3      ar4      ar5      ar6     ar7      ar8
      0.0621  -0.4273  -0.4397  -0.4091  -0.4365  -0.4075  0.5256  -0.4589
s.e.  0.1026   0.0714   0.0707   0.0700   0.0692   0.0694  0.0685   0.0735
          ma1      ma2     ma3      ma4     ma5      ma6      ma7     ma8
      -0.7601  -0.0282  0.0695  -0.0351  0.0097  -0.0825  -0.4900  0.3167
s.e.   0.1573   0.0792  0.0773   0.0474  0.0457   0.0473   0.0437  0.0808
         sar1     sma1
      -0.4169  -0.5821
s.e.   0.0919   0.1443

sigma^2 estimated as 11967:  log likelihood = -6245.66,  aic = 12529.33

Training set error measures:
                    ME     RMSE      MAE MPE MAPE     MASE         ACF1
Training set -3.367961 109.2882 64.75265 NaN  Inf 0.474939 -0.005562989
Code
summary(south_sarima)

Call:
arima(x = south_train_ts, order = c(6, 1, 7), seasonal = list(order = c(1, 1, 
    1)))

Coefficients:
          ar1      ar2      ar3      ar4      ar5      ar6     ma1      ma2
      -0.9780  -0.9563  -0.9700  -0.9337  -0.9553  -0.9134  0.0408  -0.2230
s.e.   0.0223   0.0189   0.0222   0.0230   0.0184   0.0222  0.0625   0.0583
          ma3      ma4      ma5     ma6      ma7     sar1     sma1
      -0.1473  -0.1621  -0.0390  0.0602  -0.5296  -0.2280  -0.6225
s.e.   0.0440   0.0448   0.0429  0.0434   0.0430   0.0824   0.0520

sigma^2 estimated as 36788:  log likelihood = -6816.9,  aic = 13665.8

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE         ACF1
Training set -5.319775 191.6155 110.4112 NaN  Inf 0.5394106 -0.002442382
Code
summary(west_sarima)

Call:
arima(x = west_train_ts, order = c(8, 1, 8), seasonal = list(order = c(1, 1, 
    1)))

Coefficients:
         ar1      ar2      ar3      ar4      ar5     ar6     ar7      ar8
      0.1378  -0.0262  -0.0166  -0.0177  -0.0342  0.0096  0.9038  -0.1154
s.e.  0.3119   0.0243   0.0220   0.0237   0.0203  0.0230  0.0208   0.2866
          ma1      ma2      ma3      ma4     ma5      ma6     ma7     ma8
      -0.9125  -0.0581  -0.0248  -0.0329  0.0864  -0.0295  -0.549  0.5205
s.e.   0.0789   0.1206   0.0821   0.0522  0.0502   0.0507   0.060  0.0404
         sar1     sma1
      -0.1068  -0.8832
s.e.   0.3247   0.0388

sigma^2 estimated as 7446:  log likelihood = -6002.07,  aic = 12042.15

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE         ACF1
Training set -1.192272 86.20422 50.14961 NaN  Inf 0.4991203 -0.001785725
Code
## Forecasting with the SARIMA model -------------------------------------------
east_values <- forecast(east_sarima, h = length(east_test_ts))
midwest_values <- forecast(midwest_sarima, h = length(midwest_test_ts))
south_values <- forecast(south_sarima, h = length(south_test_ts))
west_values <- forecast(west_sarima, h = length(west_test_ts))

7.2 Accuracy

Code
## Plotting & Accuracy

## East Region
east_initial_sarima_acc <- forecast::accuracy(east_values)

# east_initial_sarima_acc %>% 
#   kbl(caption = "<center>Initial SARIMA East Accuracy<center>") %>% 
#   kable_styling()

east_pred <- as.data.frame(east_values$mean)

east_pred2 <- cbind(east_pred, east_test_plot)
east_initial_sarima_mae <-mae(east_pred2$daily_deaths, east_pred2$x)
east_initial_sarima_mase <-mase(east_pred2$daily_deaths, east_pred2$x)

### East Region
east_initial_sarima_plot <-ggplot(east_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "East Region Initial SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted"))+
  theme_bw() +
  theme(legend.title = element_blank())

east_initial_sarima_plot

Code
## Midwest Region

midwest_initial_sarima_acc <- forecast::accuracy(midwest_values)

# midwest_initial_sarima_acc %>% 
#   kbl(caption = "<center>Initial SARIMA Midwest Accuracy<center>") %>% 
#   kable_styling()

midwest_pred <- as.data.frame(midwest_values$mean)

midwest_pred2 <- cbind(midwest_pred, midwest_test_plot)

midwest_pred2 <- midwest_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))

midwest_initial_sarima_mae <-mae(midwest_pred2$daily_deaths, midwest_pred2$x)
midwest_initial_sarima_mase <-mase(midwest_pred2$daily_deaths, midwest_pred2$x)

midwest_initial_sarima_plot <-ggplot(midwest_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "Midwest Region Initial SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

midwest_initial_sarima_plot

Code
## South Region
south_initial_sarima_acc <- forecast::accuracy(south_values)

# south_initial_sarima_acc %>%
#   kbl(caption = "<center>Initial SARIMA South Accuracy<center>") %>%
#   kable_styling()

south_pred <- as.data.frame(south_values$mean)

south_pred2 <- cbind(south_pred, south_test_plot)

south_pred2 <- south_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))

south_initial_sarima_mae <-mae(south_pred2$daily_deaths, south_pred2$x)
south_initial_sarima_mase <-mase(south_pred2$daily_deaths, south_pred2$x)

south_initial_sarima_plot <-ggplot(south_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "South Region Initial SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank()) 

south_initial_sarima_plot

Code
## West Region

west_initial_sarima_acc <- forecast::accuracy(west_values)

# west_initial_sarima_acc %>% 
#   kbl(caption = "<center>Initial SARIMA West Accuracy<center>") %>% 
#   kable_styling()

west_pred <- as.data.frame(west_values$mean)

west_pred2 <- cbind(west_pred, west_test_plot)

# west_pred2 <- west_pred2 %>% 
#   mutate(x = case_when((x < 0) ~ 0,
#                        .default = x))

west_initial_sarima_mae <-mae(west_pred2$daily_deaths, west_pred2$x)
west_initial_sarima_mase <-mase(west_pred2$daily_deaths, west_pred2$x)

west_initial_sarima_plot <-ggplot(west_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "West Region Initial SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

west_initial_sarima_plot

7.3 Grid Search

Code
## Define the parameter grids --------------------------------------------------
p_grid <- 0:6  # AR parameter
d_grid <- 0:0  # Differencing parameter
q_grid <- 0:6  # MA parameter
P_grid <- 0:1  # Seasonal AR parameter
D_grid <- 0:0  # Seasonal differencing parameter
Q_grid <- 0:1  # Seasonal MA parameter
s <- 7         # Seasonal period



## East Grid Search ------------------------------------------------------------

# Create East Region
east_train <- east_train %>% 
  select(daily_deaths)

# Create an empty data frame to store the results
east_results <- data.frame(order = character(),
                      seasonal = character(),
                      AIC = numeric(),
                      stringsAsFactors = FALSE)

# Perform grid search
for (p in p_grid) {
  for (d in d_grid) {
    for (q in q_grid) {
      for (P in P_grid) {
        for (D in D_grid) {
          for (Q in Q_grid) {
            order <- c(p, d, q)
            seasonal <- c(P, D, Q, s)
            model <- tryCatch(
              {
                fit <- arima(east_train, order = order, seasonal = seasonal)
                AIC_val <- AIC(fit)
                # Append the results to the data frame
                east_results <- rbind(east_results, data.frame(order = paste(order, collapse = ", "),
                                                     seasonal = paste(seasonal, collapse = ", "),
                                                     AIC = AIC_val))
              },
              error = function(e) NULL
            )
          }
        }
      }
    }
  }
}

d_grid <- 1  # Differencing parameter
D_grid <- 1  # Differencing parameter
## Midwest Grid Search ------------------------------------------------------------

# Create Midwest Region
midwest_train <- midwest_train %>% 
  select(daily_deaths)

# Create an empty data frame to store the results
midwest_results <- data.frame(order = character(),
                           seasonal = character(),
                           AIC = numeric(),
                           stringsAsFactors = FALSE)

# Perform grid search
for (p in p_grid) {
  for (d in d_grid) {
    for (q in q_grid) {
      for (P in P_grid) {
        for (D in D_grid) {
          for (Q in Q_grid) {
            order <- c(p, d, q)
            seasonal <- c(P, D, Q, s)
            model <- tryCatch(
              {
                fit <- arima(midwest_train, order = order, seasonal = seasonal)
                AIC_val <- AIC(fit)
                # Append the results to the data frame
                midwest_results <- rbind(midwest_results, data.frame(order = paste(order, collapse = ", "),
                                                               seasonal = paste(seasonal, collapse = ", "),
                                                               AIC = AIC_val))
              },
              error = function(e) NULL
            )
          }
        }
      }
    }
  }
}

## South Grid Search ------------------------------------------------------------

# Create South Region
south_train <- south_train %>% 
  select(daily_deaths)

# Create an empty data frame to store the results
south_results <- data.frame(order = character(),
                           seasonal = character(),
                           AIC = numeric(),
                           stringsAsFactors = FALSE)

# Perform grid search
for (p in p_grid) {
  for (d in d_grid) {
    for (q in q_grid) {
      for (P in P_grid) {
        for (D in D_grid) {
          for (Q in Q_grid) {
            order <- c(p, d, q)
            seasonal <- c(P, D, Q, s)
            model <- tryCatch(
              {
                fit <- arima(south_train, order = order, seasonal = seasonal)
                AIC_val <- AIC(fit)
                # Append the results to the data frame
                south_results <- rbind(south_results, data.frame(order = paste(order, collapse = ", "),
                                                               seasonal = paste(seasonal, collapse = ", "),
                                                               AIC = AIC_val))
              },
              error = function(e) NULL
            )
          }
        }
      }
    }
  }
}


## West Grid Search ------------------------------------------------------------

# Create West Region
west_train <- west_train %>% 
  select(daily_deaths)

# Create an empty data frame to store the results
west_results <- data.frame(order = character(),
                           seasonal = character(),
                           AIC = numeric(),
                           stringsAsFactors = FALSE)

# Perform grid search
for (p in p_grid) {
  for (d in d_grid) {
    for (q in q_grid) {
      for (P in P_grid) {
        for (D in D_grid) {
          for (Q in Q_grid) {
            order <- c(p, d, q)
            seasonal <- c(P, D, Q, s)
            model <- tryCatch(
              {
                fit <- arima(west_train, order = order, seasonal = seasonal)
                AIC_val <- AIC(fit)
                # Append the results to the data frame
                west_results <- rbind(west_results, data.frame(order = paste(order, collapse = ", "),
                                                               seasonal = paste(seasonal, collapse = ", "),
                                                               AIC = AIC_val))
              },
              error = function(e) NULL
            )
          }
        }
      }
    }
  }
}
Code
save(east_results, midwest_results, south_results, west_results, file = "results/sarima_grid.rda")
Code
load("results/sarima_grid.rda")

Best East SARIMA

Code
# Find the best model (minimum AIC)
best_east_model <- east_results[which.min(east_results$AIC), ]
print(best_east_model)
      order   seasonal      AIC
139 4, 0, 6 1, 0, 0, 7 11772.62

Best Midwest SARIMA

Code
# Midwest Results
# Find the best model (minimum AIC)
best_midwest_model <- midwest_results[which.min(midwest_results$AIC), ]
print(best_midwest_model)
      order   seasonal      AIC
107 3, 1, 6 1, 1, 1, 7 12520.59

Best South SARIMA

Code
# South Results
# Find the best model (minimum AIC)
best_south_model <- south_results[which.min(south_results$AIC), ]
print(best_south_model)
      order   seasonal      AIC
164 5, 1, 6 1, 1, 1, 7 13656.46

Best West SARIMA

Code
# Find the best model (minimum AIC)
best_west_model <- west_results[which.min(west_results$AIC), ]
print(best_west_model)
      order   seasonal      AIC
187 6, 1, 5 0, 1, 1, 7 12042.41

7.4 Rebuilding SARIMA

Code
# Rebuilding SARIMA Models -----------------------------------------------------
east_sarima <- arima(east_train_ts, order = c(4, 0, 6), 
                     seasonal = list(order = c(1, 0, 0), period = 7))
midwest_sarima <- arima(midwest_train_ts, order = c(3, 1, 6),
                        seasonal = list(order = c(0, 1, 1), period = 7))
south_sarima <- arima(south_train_ts, order = c(5, 1, 6),
                           seasonal = list(order = c(1, 1, 0), period = 7))
west_sarima <- arima(west_train_ts, order = c(6, 1, 5),
                     seasonal = list(order = c(0, 1, 1), period = 7))


## Forecasting with the NEW SARIMA model ---------------------------------------
east_values <- forecast(east_sarima, h = length(east_test_ts))
midwest_values <- forecast(midwest_sarima, h = length(midwest_test_ts))
south_values <- forecast(south_sarima, h = length(south_test_ts))
west_values <- forecast(west_sarima, h = length(west_test_ts))
Code
## Plotting & Accuracy

## East Region
east_tuned_sarima_acc <- forecast::accuracy(east_values)

# east_tuned_sarima_acc %>% 
#   kbl(caption = "<center>Tuned SARIMA East Accuracy<center>") %>% 
#   kable_styling()

east_pred <- as.data.frame(east_values$mean)

east_pred2 <- cbind(east_pred, east_test_plot)
east_tuned_sarima_mae <-mae(east_pred2$daily_deaths, east_pred2$x)
east_tuned_sarima_mase <-mase(east_pred2$daily_deaths, east_pred2$x)

### East Region
east_tuned_sarima_plot <-ggplot(east_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "East Region Tuned SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted"))+
  theme_bw() +
  theme(legend.title = element_blank())

east_tuned_sarima_plot

Code
## Midwest Region

midwest_tuned_sarima_acc <- forecast::accuracy(midwest_values)

# midwest_tuned_sarima_acc %>% 
#   kbl(caption = "<center>Tuned SARIMA Midwest Accuracy<center>") %>% 
#   kable_styling()

midwest_pred <- as.data.frame(midwest_values$mean)
midwest_pred2 <- cbind(midwest_pred, midwest_test_plot)
midwest_pred2 <- midwest_pred2 %>% 
   mutate(x = case_when((x < 0) ~ 0,
                        .default = x))
midwest_tuned_sarima_mae <-mae(midwest_pred2$daily_deaths, midwest_pred2$x)
midwest_tuned_sarima_mase <-mase(midwest_pred2$daily_deaths, midwest_pred2$x)

midwest_tuned_sarima_plot <-ggplot(midwest_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "Midwest Region Tuned SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

midwest_tuned_sarima_plot

Code
## South Region
south_tuned_sarima_acc <- forecast::accuracy(south_values)

# south_tuned_sarima_acc %>%
#   kbl(caption = "<center>Tuned SARIMA South Accuracy<center>") %>%
#   kable_styling()

south_pred <- as.data.frame(south_values$mean)

south_pred2 <- cbind(south_pred, south_test_plot)
south_pred2 <- south_pred2 %>%
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))
south_tuned_sarima_mae <-mae(south_pred2$daily_deaths, south_pred2$x)
south_tuned_sarima_mase <-mase(south_pred2$daily_deaths, south_pred2$x)

south_tuned_sarima_plot <-ggplot(south_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "South Region Tuned SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank()) 

south_tuned_sarima_plot

Code
# mase(south_pred2$daily_deaths, south_pred2$x)

## West Region

west_tuned_sarima_acc <- forecast::accuracy(west_values)

# west_tuned_sarima_acc %>% 
#   kbl(caption = "<center>Tuned SARIMA West Accuracy<center>") %>% 
#   kable_styling()

west_pred <- as.data.frame(west_values$mean)

west_pred2 <- cbind(west_pred, west_test_plot)
west_tuned_sarima_mae <-mae(west_pred2$daily_deaths, west_pred2$x)
west_tuned_sarima_mase <-mase(west_pred2$daily_deaths, west_pred2$x)

# west_pred2 <- west_pred2 %>% 
#   mutate(x = case_when((x < 0) ~ 0,
#                        .default = x))


west_tuned_sarima_plot <-ggplot(west_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "West Region Tuned SARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

west_tuned_sarima_plot

Code
# saving results
save(east_tuned_sarima_acc, east_initial_sarima_acc, east_initial_sarima_plot, east_tuned_sarima_plot,
     midwest_tuned_sarima_acc, midwest_initial_sarima_acc, midwest_initial_sarima_plot, midwest_tuned_sarima_plot,
     south_tuned_sarima_acc, south_initial_sarima_acc, south_initial_sarima_plot, south_tuned_sarima_plot,
     west_tuned_sarima_acc, west_initial_sarima_acc, west_initial_sarima_plot, west_tuned_sarima_plot,
     east_initial_sarima_mae, east_initial_sarima_mase, east_tuned_sarima_mae, east_tuned_sarima_mase,
     midwest_initial_sarima_mae, midwest_initial_sarima_mase, midwest_tuned_sarima_mae, midwest_tuned_sarima_mase,
     south_initial_sarima_mae, south_initial_sarima_mase, south_tuned_sarima_mae, south_tuned_sarima_mase,
     west_initial_sarima_mae, west_initial_sarima_mase, west_tuned_sarima_mae, west_tuned_sarima_mase,
     file = "results/uni_sarima.rda"
     )

8. Auto ARIMA Model

8.1 Building AutoARIMA

Code
# Building AutoARIMA Model -----------------------------------------------------
east_arima <- auto.arima(east_train_ts, seasonal = TRUE)
midwest_arima <- auto.arima(midwest_train_ts, seasonal = TRUE)
south_arima <- auto.arima(south_train_ts, seasonal = TRUE)
west_arima <- auto.arima(west_train_ts, seasonal = TRUE)

# Summary of the ARIMA model
summary(east_arima)
Series: east_train_ts 
ARIMA(5,1,2) 

Coefficients:
         ar1      ar2      ar3      ar4      ar5      ma1     ma2
      0.5142  -0.7304  -0.1394  -0.3479  -0.2207  -1.0295  0.7353
s.e.  0.0480   0.0344   0.0426   0.0329   0.0418   0.0410  0.0265

sigma^2 = 6861:  log likelihood = -5956.39
AIC=11928.79   AICc=11928.93   BIC=11968.22

Training set error measures:
                    ME     RMSE     MAE MPE MAPE      MASE       ACF1
Training set 0.1506172 82.50546 47.1043 NaN  Inf 0.7224607 0.01707355
Code
summary(midwest_arima)
Series: midwest_train_ts 
ARIMA(5,1,3) 

Coefficients:
         ar1      ar2      ar3      ar4      ar5      ma1     ma2      ma3
      0.1952  -0.6269  -0.3389  -0.2999  -0.4563  -1.2016  0.9332  -0.3022
s.e.  0.0607   0.0312   0.0356   0.0276   0.0442   0.0702  0.0718   0.0382

sigma^2 = 21712:  log likelihood = -6544.65
AIC=13107.31   AICc=13107.49   BIC=13151.67

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 0.3932594 146.6988 85.10252 NaN  Inf 0.6241985 -0.03369519
Code
summary(south_arima)
Series: south_train_ts 
ARIMA(5,1,4) 

Coefficients:
         ar1      ar2      ar3      ar4      ar5      ma1     ma2      ma3
      0.1881  -0.9049  -0.0800  -0.4542  -0.5567  -0.9401  1.0118  -0.7340
s.e.  0.0349   0.0292   0.0464   0.0251   0.0303   0.0357  0.0434   0.0525
         ma4
      0.4897
s.e.  0.0402

sigma^2 = 40663:  log likelihood = -6864.82
AIC=13749.64   AICc=13749.86   BIC=13798.93

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 0.1420877 200.6629 119.7558 NaN  Inf 0.5850634 -0.06523796
Code
summary(west_arima)
Series: west_train_ts 
ARIMA(4,1,3) 

Coefficients:
          ar1     ar2      ar3      ar4      ma1      ma2     ma3
      -0.0876  0.1258  -0.5602  -0.4779  -0.5985  -0.3463  0.5343
s.e.   0.0505  0.0450   0.0330   0.0309   0.0509   0.0784  0.0421

sigma^2 = 11991:  log likelihood = -6241.45
AIC=12498.9   AICc=12499.04   BIC=12538.33

Training set error measures:
                    ME     RMSE      MAE MPE MAPE      MASE        ACF1
Training set 0.1543984 109.0727 72.43011 NaN  Inf 0.7208697 -0.08454213
Code
## Forecasting Test Data -------------------------------------------------------
east_values <- forecast(east_arima, h = length(east_test_ts))
midwest_values <- forecast(midwest_arima, h = length(midwest_test_ts))
south_values <- forecast(south_arima, h = length(south_test_ts))
west_values <- forecast(west_arima, h = length(west_test_ts))

8.2 Accuracy

Code
## Plotting & Accuracy

## East Region
east_auto_acc <- forecast::accuracy(east_values)

# east_auto_acc  %>% 
#   kbl(caption = "<center>Auto ARIMA East Accuracy<center>") %>% 
#   kable_styling()

east_pred <- as.data.frame(east_values$mean)

east_pred2 <- cbind(east_pred, east_test_plot)
east_pred2 <- east_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))

east_auto_mae <-mae(east_pred2$daily_deaths, east_pred2$x)
east_auto_mase <-mase(east_pred2$daily_deaths, east_pred2$x)


east_auto_plot <- ggplot(east_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Nov 2022", "Jan 2023", "March 2023")) +
  labs(title = "East Region Auto ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue","orange"), labels = c("Actual", "Predicted"))+
  theme_bw() +
  theme(legend.title = element_blank())

east_auto_plot 

Code
## Midwest Region
midwest_auto_acc <- forecast::accuracy(midwest_values)

# midwest_auto_acc %>% 
#   kbl(caption = "<center>Auto ARIMA Midwest Accuracy<center>") %>% 
#   kable_styling()

midwest_pred <- as.data.frame(midwest_values$mean)

midwest_pred2 <- cbind(midwest_pred, midwest_test_plot)

midwest_pred2 <- midwest_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))
midwest_auto_mae <-mae(midwest_pred2$daily_deaths, midwest_pred2$x)
midwest_auto_mase <-mase(midwest_pred2$daily_deaths, midwest_pred2$x)

midwest_auto_plot <-ggplot(midwest_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "Midwest Region Auto ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

midwest_auto_plot 

Code
## South Region
south_auto_acc <- forecast::accuracy(south_values)

# south_auto_acc %>% 
#   kbl(caption = "<center>Auto ARIMA South Accuracy<center>") %>% 
#   kable_styling()

south_pred <- as.data.frame(south_values$mean)

south_pred2 <- cbind(south_pred, south_test_plot)

south_pred2 <- south_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))

south_auto_mae <-mae(south_pred2$daily_deaths, south_pred2$x)
south_auto_mase <-mase(south_pred2$daily_deaths, south_pred2$x)

south_auto_plot <- ggplot(south_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "South Region Auto ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank()) 

south_auto_plot

Code
## West Region
west_auto_acc <- forecast::accuracy(west_values)

# west_auto_acc %>% 
#   kbl(caption = "<center>Auto ARIMA West Accuracy<center>") %>% 
#   kable_styling()

west_pred <- as.data.frame(west_values$mean)

west_pred2 <- cbind(west_pred, west_test_plot)

west_pred2 <- west_pred2 %>% 
  mutate(x = case_when((x < 0) ~ 0,
                       .default = x))

west_auto_mae <-mae(west_pred2$daily_deaths, west_pred2$x)
west_auto_mase <-mase(west_pred2$daily_deaths, west_pred2$x)

west_auto_plot <- ggplot(west_pred2) +
  geom_line(aes(x = num, y = daily_deaths, color = "dodgerblue"), linewidth = .75) +
  geom_line(aes(x = num, y = x, color = "orange"), linewidth = .75) +
  scale_x_continuous(limits = c(1023,1136),
                     breaks = c(1023, 1069, 1115),
                     labels = c("Dec 2022", "Jan 2023", "March 2023")) +
  labs(title = "West Region Auto ARIMA",
       y = NULL,
       x = NULL) +
  scale_color_manual(values = c("dodgerblue", "orange"), labels = c("Actual", "Predicted")) +
  theme_bw() +
  theme(legend.title = element_blank())

west_auto_plot

Code
# saving results
save(east_auto_plot,
     midwest_auto_plot,
     south_auto_plot,
     west_auto_plot,
     east_auto_acc,
     midwest_auto_acc,
     south_auto_acc,
     west_auto_acc,
     east_auto_mae,
     east_auto_mase,
     midwest_auto_mae,
     midwest_auto_mase,
     south_auto_mae,
     south_auto_mase,
     west_auto_mae,
     west_auto_mase,
     file = "results/uni_autoarima.rda")

9. Compare Arima Models

Code
load("results/uni_arima.rda")
load("results/uni_sarima.rda")
load("results/uni_autoarima.rda")

East Results

Code
# east1 <- east_initial_arima_acc %>%
#   as.data.frame()
# 
# east2 <- east_tuned_arima_acc %>%
#   as.data.frame()
# 
# east3 <- east_initial_sarima_acc%>%
#   as.data.frame()
# 
# east4 <- east_tuned_sarima_acc%>%
#   as.data.frame()
# 
# east5 <- east_auto_acc%>%
#   as.data.frame()

tribble(
  ~model, ~mae, ~mase,
  #--|--|----
  "initial arima", east_initial_arima_mae, east_initial_arima_mase,
  "tuned arima", east_tuned_arima_mae, east_tuned_arima_mase,
  "initial sarima", east_initial_sarima_mae, east_initial_sarima_mase,
  "tuned sarima", east_tuned_sarima_mae, east_tuned_sarima_mase,
  "auto arima", east_auto_mae, east_auto_mase,
) %>% 
  kbl() %>% 
  kable_styling()
model mae mase
initial arima 132.49240 1.4959673
tuned arima 82.33062 0.9295923
initial sarima 81.75051 0.9230423
tuned sarima 97.78422 1.1040784
auto arima 59.06023 0.6668471

Midwest Results

Code
# midwest1 <- midwest_initial_arima_acc %>% 
#   as.data.frame()
# 
# midwest2 <- midwest_tuned_arima_acc %>% 
#   as.data.frame()
# 
# midwest3 <- midwest_initial_sarima_acc%>% 
#   as.data.frame() 
# 
# midwest4 <- midwest_tuned_sarima_acc%>% 
#   as.data.frame()
# 
# midwest5 <- midwest_auto_acc%>% 
#   as.data.frame()

tribble(
  ~model, ~mae, ~mase,
  #--|--|----
  "initial arima", midwest_initial_arima_mae, midwest_initial_arima_mase,
  "tuned arima", midwest_tuned_arima_mae, midwest_tuned_arima_mase,
  "initial sarima", midwest_initial_sarima_mae, midwest_initial_sarima_mase,
  "tuned sarima", midwest_tuned_sarima_mae, midwest_tuned_sarima_mase,
  "auto arima", midwest_auto_mae, midwest_auto_mase,
) %>% 
  kbl() %>% 
  kable_styling()
model mae mase
initial arima 80.60565 0.6965768
tuned arima 64.10060 0.5539437
initial sarima 68.51924 0.5921287
tuned sarima 60.59172 0.5236207
auto arima 64.09667 0.5539097

South Results

Code
# south1 <- south_initial_arima_acc %>% 
#   as.data.frame()
# 
# south2 <- south_tuned_arima_acc %>% 
#   as.data.frame()
# 
# south3 <- south_initial_sarima_acc%>% 
#   as.data.frame() 
# 
# south4 <- south_tuned_sarima_acc%>% 
#   as.data.frame()
# 
# south5 <- south_auto_acc%>% 
#   as.data.frame()

tribble(
  ~model, ~mae, ~mase,
  #--|--|----
  "initial arima", south_initial_arima_mae, south_initial_arima_mase,
  "tuned arima", south_tuned_arima_mae, south_tuned_arima_mase,
  "initial sarima", south_initial_sarima_mae, south_initial_sarima_mase,
  "tuned sarima", south_tuned_sarima_mae, south_tuned_sarima_mase,
  "auto arima", south_auto_mae, south_auto_mase,
) %>% 
  kbl() %>% 
  kable_styling()
model mae mase
initial arima 125.7716 0.9020594
tuned arima 115.0456 0.5951632
initial sarima 114.9575 0.5947074
tuned sarima 102.1624 0.5285149
auto arima 129.0046 0.6673773

West Results

Code
# west1 <- west_initial_arima_acc %>% 
#   as.data.frame()
# 
# west2 <- west_tuned_arima_acc %>% 
#   as.data.frame()
# 
# west3 <- west_initial_sarima_acc%>% 
#   as.data.frame() 
# 
# west4 <- west_tuned_sarima_acc%>% 
#   as.data.frame()
# 
# west5 <- west_auto_acc%>% 
#   as.data.frame()

tribble(
  ~model, ~mae, ~mase,
  #--|--|----
  "initial arima", west_initial_arima_mae, west_initial_arima_mase,
  "tuned arima", west_tuned_arima_mae, west_tuned_arima_mase,
  "initial sarima", west_initial_sarima_mae, west_initial_sarima_mase,
  "tuned sarima", west_tuned_sarima_mae, west_tuned_sarima_mase,
  "auto arima", west_auto_mae, west_auto_mase,
) %>% 
  kbl() %>% 
  kable_styling()
model mae mase
initial arima 65.95899 0.7373000
tuned arima 48.93596 0.5470139
initial sarima 49.73624 0.5559596
tuned sarima 39.48698 0.4413918
auto arima 69.21746 0.7737237